// ANES24Ethnicity
log using ANES24Ethnicity.log, replace
use "C:\Users\sbstjp\OneDrive - Cardiff University\anes_timeseries_2024_stata_20250219.dta" // ANES 2024 Time Series Study // Preliminary Release: Pre-Election Data February 19, 2025 version

// Create social justice scale
* Rename variables 
rename V241290x Edi 
rename V241372x Tgbathroom 
rename V241375x Tgsport 
rename V241412x Appprotestgaza 

* Delete missing values
foreach var in Edi Tgbathroom Tgsport Appprotestgaza {
    replace `var' = . if `var' < 0
    tabulate `var', missing
}

*Reverse coding so social justice values are high
foreach var in Appprotestgaza Tgbathroom Edi {
    qui sum `var'
    local max_value = r(max)
    gen r`var' = `max_value' + 1 - `var'
}

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in rEdi rTgbathroom Tgsport rAppprotestgaza {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

*At this point, the scale has a Cronbach's alpha of 0.76. 

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values and means a 0-1 standardization scale isn't feasible as missing values would overlap with the scale
foreach var in srEdi srTgbathroom sTgsport srAppprotestgaza {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreSJV = 0
gen count_nonzeroSJV = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in srEdi srTgbathroom sTgsport srAppprotestgaza  {
    replace total_scoreSJV = total_scoreSJV + `var'
    replace count_nonzeroSJV = count_nonzeroSJV + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen SocJusValues = .
replace SocJusValues = total_scoreSJV / count_nonzeroSJV if count_nonzeroSJV > 0

// Demographics
*Delete missing values and rename
rename V241458x age
replace age=. if age<0 

rename V241566x income
replace income=. if income<0 

rename V241501x race
replace race = . if inlist(race, -9, -8, -4)

*Generate dummies
gen FemaleGender=.
replace FemaleGender=1 if V241551==1
replace FemaleGender=2 if V241551==2

gen Graduate=.
replace Graduate=0 if inrange(V241465x, 1, 3)
replace Graduate=1 if inlist(V241465x, 4, 5)

gen Black=.
replace Black=1 if race==2
replace Black=0 if race==1
replace Black=0 if inrange(race, 3, 6)

gen Hispanic=.
replace Hispanic=1 if race==3
replace Hispanic=0 if inlist(race, 1, 2)
replace Hispanic=0 if inrange(race, 4, 6)

gen AsianAndOther=.
replace AsianAndOther=1 if race==4
replace AsianAndOther=0 if inrange(race, 1, 3)
replace AsianAndOther=0 if inlist(race, 5, 6)

gen White=.
replace White=1 if race==1
replace White=0 if inrange(race, 2, 6)

// Standardize variables 
egen Age = std(age)
egen Income = std(income)

// Regessions
regress SocJusValues Age FemaleGender Graduate Income White [pweight=V240105a], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income Black [pweight=V240105a], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income Hispanic [pweight=V240105a], robust 
eststo
regress SocJusValues Age FemaleGender Graduate Income AsianAndOther [pweight=V240105a], robust 
eststo
esttab

//Correlations
pwcorr SocJusValues White Black Hispanic AsianAndOther [aweight=V240105a], sig
log close